/*******************************************************************************
																			
	DESCRIPTION: This do file investigates how the predictive power changes when 
				we use a model from year Y to make predictions for individuals from year X. 		

*******************************************************************************/

clear all
global id_code 117

* Import programs:
run  "${code}/Output Generation/108_0_MainStatistics_Program.do"

set seed 2110

/*******************************************************************************
*	1. Compute statistics
********************************************************************************/

local model Full
local model_sample Full

* Create frame to store results:
frame create stats year_model year_sample n_obs double(r_sq)

* Calculate summary statistics for each year:  
forval year_model = 1992/2016 {	
	forval year_sample = 1992/2016 {
			
		di "Computing `year_sample' with `year_model' model."
		use "${data}/003_MainWithEnsemblePred_`model_sample'_`year_sample'Individuals_TrainedOn`year_model'modelIndividuals_`model'.dta", clear
		
		stats emplAft6M_0M_In p_emplAft6M_0M_In
		frame post stats (`year_model') (`year_sample') (`r(n_ex)') (`r(r2_ex)')

	}
}

frame change stats

* Obtaine change in the R-squared compared to the base year when individuals fixed
bys year_sample: gen temp = r_sq if year_model == year_sample
by year_sample: egen base_r_sq = mean(temp)
drop temp
gen rel_r_sq = (r_sq / base_)


/***********************************************************************
* A2. Plot the R-squared and relative R-squared
************************************************************************/
gen upper = .
sort year_sample year_model

/* Plot the R-squared from regression of otcomes for individuals from year 2006 
on predictions made using models from other years */

replace upper = 0.2
local barcall upper year_sample if inrange(year_sample, 2008, 2009) | inrange(year_sample, 2012, 2012), bcolor(gs14) base(0)

twoway ///
	(bar `barcall') ///
	(connected r_sq year_model if (year_sample ==1998 & year_model>=1995), msymbol(T) color(ebblue)) ///
	(connected r_sq year_model if (year_sample ==2006 & year_model>=1995), msymbol(O) color(orange_red)) ///
	(connected r_sq year_model if (year_sample ==2014 & year_model>=1995), msymbol(S) color(navy)), ///	
	xline(2006, lcolor(red) lpattern(dash) noextend) ///
	graphregion(color(white)) ///
	plotregion(margin(b=0 y=0)) ///
	ytitle("R{superscript:2}", orientation(horizontal)) ///
	xtitle("") ///
	ylabel(0(0.05)0.2, format(%4.2f) angle(0)) yscale(titlegap(3)) ///
	xlabel(1995(4)2016) xscale(titlegap(1)) ///
	legend(cols(3) order(2 "1998 sample" 3 "2006 sample" 4 "2014 sample") region(lwidth(none)) size(*0.8)) ///
	name(r_sq, replace)

graph export "${output}/${id_code}_6MJFR_R_squared_AllIndiv_2006Model_Full_from1995.pdf", as(pdf) replace

/* Plot the relative R-squared, i.e. R-squared from regression of outcomes for individuals from year 2006 
on predictions made using models from other years divided by R-squared from regression of outcomes 
for individuals from year 2006 on predictions made using model from 2006 */

replace upper = 1
local barcall upper year_sample if inrange(year_sample, 2008, 2009) | inrange(year_sample, 2012, 2012), bcolor(gs14) base(0)

twoway ///
	(bar `barcall') ///
	(connected rel_r_sq year_model if (year_sample ==1998 & year_model>=1995), msymbol(T) color(ebblue)) ///
	(connected rel_r_sq year_model if (year_sample ==2006 & year_model>=1995), msymbol(O) color(orange_red)) ///
	(connected rel_r_sq year_model if (year_sample ==2014 & year_model>=1995), msymbol(S) color(navy)), ///	
	xline(1998, lcolor(ebblue) lpattern(dash) noextend) ///
	xline(2006, lcolor(orange_red) lpattern(dash) noextend) ///
	xline(2014, lcolor(navy) lpattern(dash) noextend) ///
	graphregion(color(white)) ///
	plotregion(margin(b=0 y=0)) ///
	ytitle("R{superscript:2}{subscript:t,s } {&frasl } R{superscript:2}{subscript:s,s}") ///
	xtitle("") ///
	ylabel(0(0.2)1, angle(0)) yscale(titlegap(3)) ///
	xlabel(1995(4)2016) xscale(titlegap(1)) ///
	legend(cols(3) order(2 "1998 sample" 3 "2006 sample" 4 "2014 sample") region(lwidth(none)) size(*0.8)) ///
	name(rel_r_sq, replace)
	
graph export "${output}/${id_code}_6MJFR_relative_R_squared_AllIndiv_2006Model_Full_from1995_3models.pdf", as(pdf) replace
